<?php
/**
* DedeCMS 正文失效图片清理工具(最终版)
* 功能:扫描指定ID范围内的文章正文,检测并移除失效的<img>标签
* 检测机制:状态码 + Content-Type + 图片真实格式/最小大小
*/
require_once(dirname(__FILE__) . "/include/common.inc.php");
// 绕过Dede安全检查
if (!isset($cfg_db_type)) {
$GLOBALS['safe_check'] = false;
}
// 获取参数
$startid = isset($_GET['startid']) ? intval($_GET['startid']) : 0;
$endid = isset($_GET['endid']) ? intval($_GET['endid']) : 0;
$step = isset($_GET['step']) ? intval($_GET['step']) : 50;
$timeout = isset($_GET['timeout']) ? intval($_GET['timeout']) : 5;
$min_size = isset($_GET['min_size']) ? intval($_GET['min_size']) : 1024; // 最小有效图片大小(字节)
// HTML界面
echo "<!DOCTYPE html><html><head><meta charset='utf-8'><title>清理正文失效图片</title>";
echo "<style>body{font-family:system-ui,sans-serif;padding:20px;max-width:1200px;margin:0 auto;background:#f5f7fb;}";
echo ".toolbar{background:#fff;padding:15px 20px;border-radius:12px;box-shadow:0 1px 3px rgba(0,0,0,0.05);margin-bottom:20px;}";
echo ".log{background:#fff;border-radius:12px;padding:20px;box-shadow:0 1px 3px rgba(0,0,0,0.05);}";
echo ".log-item{font-family:monospace;font-size:13px;padding:6px 0;border-bottom:1px solid #eee;}";
echo ".success{color:#2e7d32;} .error{color:#c62828;} .info{color:#0d47a1;}";
echo "button, input[type='submit']{background:#0f3b5c;color:white;border:none;padding:8px 18px;border-radius:6px;cursor:pointer;}";
echo "button:hover{background:#1e4a6e;} input[type='number']{padding:6px 10px;border:1px solid #ccc;border-radius:6px;width:100px;}";
echo "</style></head><body>";
echo "<div class='toolbar'><h2>🧹 清理正文失效图片(最终版·三重检测)</h2>";
if ($startid == 0 && $endid == 0) {
echo '<form method="get" action="">
<label>起始ID: <input type="number" name="startid" value="1" required></label>
<label>截止ID: <input type="number" name="endid" value="2000" required></label>
<label>每批处理: <input type="number" name="step" value="50"></label>
<label>超时(秒): <input type="number" name="timeout" value="5"></label>
<label>最小图片(字节): <input type="number" name="min_size" value="1024"></label>
<input type="submit" value="开始清理">
</form>
<p style="font-size:13px;color:#666;">🔍 三重检测:HTTP状态码 + Content-Type + 真实图片格式/最小大小。最小图片建议设为1024字节,可避免占位图误判为有效。</p>';
echo "</div></body></html>";
exit;
}
echo "<p>📌 扫描范围: ID {$startid} ~ {$endid} | 每批处理: {$step} 篇 | 超时: {$timeout}秒 | 最小有效图片: {$min_size} 字节</p>";
echo "</div><div class='log'>";
// 构造查询
$sql = "SELECT aid, body FROM `dede_addonarticle` WHERE aid >= {$startid} AND aid < (" . ($startid + $step) . ") AND aid <= {$endid} ORDER BY aid ASC";
$dsql->SetQuery($sql);
$dsql->Execute();
$count = 0;
$processed = 0;
$last_aid = $startid - 1;
while ($row = $dsql->GetArray()) {
$aid = $row['aid'];
$body = $row['body'];
$has_change = false;
$last_aid = $aid;
// 正则匹配所有img标签
preg_match_all('/<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>/i', $body, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$url = $match[1];
$full_tag = $match[0];
$check_result = check_image_valid($url, $timeout, $min_size);
if ($check_result === true) {
echo "<div class='log-item'><span class='success'>✅ 有效</span> 文章 #{$aid} 图片: " . htmlspecialchars($url) . "</div>";
} else {
// 移除该img标签
$body = str_replace($full_tag, '', $body);
$has_change = true;
$reason = is_string($check_result) ? $check_result : '检测失败';
echo "<div class='log-item'><span class='error'>❌ 已移除 ({$reason})</span> 文章 #{$aid} 图片: " . htmlspecialchars($url) . "</div>";
}
$count++;
}
// 写回数据库
if ($has_change) {
$clean_body = addslashes($body);
$up_sql = "UPDATE `dede_addonarticle` SET body='{$clean_body}' WHERE aid='{$aid}'";
$dsql->ExecuteNoneQuery($up_sql);
}
$processed++;
}
// 自动分批继续
$next_start = $last_aid + 1;
if ($next_start <= $endid && $processed > 0) {
echo "<hr><div class='info'>⏳ 已完成 ID {$startid} ~ {$last_aid},正在继续下一批...</div>";
echo "<meta http-equiv='refresh' content='1;url=?startid={$next_start}&endid={$endid}&step={$step}&timeout={$timeout}&min_size={$min_size}'>";
} else {
echo "<hr><div class='success'>✅ 全部清理完成!共检测图片 {$count} 次,涉及文章 {$processed} 篇。</div>";
echo "<br><a href='?'>🔙 返回首页重新开始</a>";
}
/**
* 三重检测:状态码 + Content-Type + 真实图片格式/最小大小
* @param string $url 图片地址
* @param int $timeout 超时秒数
* @param int $min_size 最小有效图片大小(字节)
* @return bool|string true=有效, 其他=失效原因
*/
function check_image_valid($url, $timeout = 5, $min_size = 1024) {
$url = trim($url);
if (empty($url)) return '空地址';
// 本地文件检测
if (strpos($url, 'http://') !== 0 && strpos($url, 'https://') !== 0) {
$local_path = $_SERVER['DOCUMENT_ROOT'] . '/' . ltrim($url, '/');
if (!file_exists($local_path)) return '本地文件不存在';
$size = filesize($local_path);
if ($size < $min_size) return "本地文件过小({$size}字节)";
// 验证图片格式
$info = @getimagesize($local_path);
if ($info === false) return '本地文件非图片格式';
return true;
}
// 远程URL检测
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
$error = curl_errno($ch);
curl_close($ch);
if ($error !== 0) return "连接失败(error:{$error})";
if ($http_code != 200) return "HTTP状态码:{$http_code}";
if (strpos($content_type, 'image/') !== 0) return "非图片类型:{$content_type}";
// 下载一小部分数据验证图片格式和大小
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RANGE, "0-16383"); // 获取前16KB用于验证
$data = curl_exec($ch);
$download_size = curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD);
curl_close($ch);
if ($data === false || $download_size == 0) return "无法获取图片数据";
if ($download_size < $min_size) return "图片数据过小({$download_size}字节)";
// 使用getimagesizefromstring验证是否为真实图片
$info = @getimagesizefromstring($data);
if ($info === false) return "非有效图片格式";
return true;
}
echo "</div></body></html>";
?>